{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# EKS CSI FSX Lustre Setup\n",
    "\n",
    "Amazon FSx for Lustre is a high-performance file system optimized for deep learning workloads. FSx provides POSIX-compliant file system access to S3 for multiple readers and writers simultaneously.\n",
    "  \n",
    "The Amazon FSx for Lustre Container Storage Interface (CSI) driver provides a CSI interface that allows Amazon EKS clusters to manage the lifecycle of Amazon FSx for Lustre file systems.  \n",
    "\n",
    "https://docs.aws.amazon.com/eks/latest/userguide/fsx-csi.html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. Install the FSx CSI Driver for Kubernetes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[36m[ℹ]  eksctl version 0.27.0\n",
      "\u001b[0m\u001b[36m[ℹ]  using region us-west-2\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 31.122762ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 60.368558ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 190.17724ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 249.376ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 534.459072ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 1.18582624s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 2.344678848s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 5.099181568s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 11.512663296s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 19.470670848s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 31.43721984s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 1m37.290436608s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 2m23.99514624s\n",
      "\u001b[0mrequest expired, resigning\n",
      "\u001b[36m[ℹ]  IAM Open ID Connect provider is already associated with cluster \"cluster\" in \"us-west-2\"\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!eksctl utils associate-iam-oidc-provider \\\n",
    "    --region us-west-2 \\\n",
    "    --cluster cluster \\\n",
    "    --approve"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    \u001b[94m\"Version\"\u001b[39;49;00m:\u001b[33m\"2012-10-17\"\u001b[39;49;00m,\n",
      "    \u001b[94m\"Statement\"\u001b[39;49;00m:[\n",
      "        {\n",
      "            \u001b[94m\"Effect\"\u001b[39;49;00m:\u001b[33m\"Allow\"\u001b[39;49;00m,\n",
      "            \u001b[94m\"Action\"\u001b[39;49;00m:[\n",
      "                \u001b[33m\"iam:CreateServiceLinkedRole\"\u001b[39;49;00m,\n",
      "                \u001b[33m\"iam:AttachRolePolicy\"\u001b[39;49;00m,\n",
      "                \u001b[33m\"iam:PutRolePolicy\"\u001b[39;49;00m\n",
      "            ],\n",
      "            \u001b[94m\"Resource\"\u001b[39;49;00m:\u001b[33m\"arn:aws:iam::*:role/aws-service-role/s3.data-source.lustre.fsx.amazonaws.com/*\"\u001b[39;49;00m\n",
      "        },\n",
      "        {\n",
      "            \u001b[94m\"Action\"\u001b[39;49;00m:\u001b[33m\"iam:CreateServiceLinkedRole\"\u001b[39;49;00m,\n",
      "            \u001b[94m\"Effect\"\u001b[39;49;00m:\u001b[33m\"Allow\"\u001b[39;49;00m,\n",
      "            \u001b[94m\"Resource\"\u001b[39;49;00m:\u001b[33m\"*\"\u001b[39;49;00m,\n",
      "            \u001b[94m\"Condition\"\u001b[39;49;00m:{\n",
      "                \u001b[94m\"StringLike\"\u001b[39;49;00m:{\n",
      "                    \u001b[94m\"iam:AWSServiceName\"\u001b[39;49;00m:[\n",
      "                        \u001b[33m\"fsx.amazonaws.com\"\u001b[39;49;00m\n",
      "                    ]\n",
      "                }\n",
      "            }\n",
      "        },\n",
      "        {\n",
      "            \u001b[94m\"Effect\"\u001b[39;49;00m:\u001b[33m\"Allow\"\u001b[39;49;00m,\n",
      "            \u001b[94m\"Action\"\u001b[39;49;00m:[\n",
      "                \u001b[33m\"s3:ListBucket\"\u001b[39;49;00m,\n",
      "                \u001b[33m\"fsx:CreateFileSystem\"\u001b[39;49;00m,\n",
      "                \u001b[33m\"fsx:DeleteFileSystem\"\u001b[39;49;00m,\n",
      "                \u001b[33m\"fsx:DescribeFileSystems\"\u001b[39;49;00m\n",
      "            ],\n",
      "            \u001b[94m\"Resource\"\u001b[39;49;00m:[\n",
      "                \u001b[33m\"*\"\u001b[39;49;00m\n",
      "            ]\n",
      "        }\n",
      "    ]\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "!pygmentize eks-csi-fsx/fsx-csi-driver.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    \"Policy\": {\n",
      "        \"PolicyName\": \"Amazon_FSx_Lustre_CSI_Driver\",\n",
      "        \"PolicyId\": \"ANPATLVNRE7W3KS26ZFGO\",\n",
      "        \"Arn\": \"arn:aws:iam::231218423789:policy/Amazon_FSx_Lustre_CSI_Driver\",\n",
      "        \"Path\": \"/\",\n",
      "        \"DefaultVersionId\": \"v1\",\n",
      "        \"AttachmentCount\": 0,\n",
      "        \"PermissionsBoundaryUsageCount\": 0,\n",
      "        \"IsAttachable\": true,\n",
      "        \"CreateDate\": \"2020-10-29T14:48:18Z\",\n",
      "        \"UpdateDate\": \"2020-10-29T14:48:18Z\"\n",
      "    }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "!aws iam create-policy \\\n",
    "    --policy-name Amazon_FSx_Lustre_CSI_Driver \\\n",
    "    --policy-document file://eks-csi-fsx/fsx-csi-driver.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "policy_arn= 'arn:aws:iam::231218423789:policy/Amazon_FSx_Lustre_CSI_Driver'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[36m[ℹ]  eksctl version 0.27.0\n",
      "\u001b[0m\u001b[36m[ℹ]  using region us-west-2\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 44.729366ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 93.128318ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 124.550252ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 474.110008ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 796.961648ms\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 1.102759744s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 1.953915776s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 5.29014592s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 11.353636608s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 18.666825216s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 59.4691072s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 1m19.14287104s\n",
      "\u001b[0m\u001b[32m[!]  retryable error (RequestError: send request failed\n",
      "caused by: Put \"http://169.254.169.254/latest/api/token\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)) from ec2metadata/GetToken - will retry after delay of 2m32.973053952s\n",
      "\u001b[0mrequest expired, resigning\n",
      "\u001b[36m[ℹ]  1 iamserviceaccount (kube-system/fsx-csi-controller-sa) was included (based on the include/exclude rules)\n",
      "\u001b[0m\u001b[32m[!]  serviceaccounts that exists in Kubernetes will be excluded, use --override-existing-serviceaccounts to override\n",
      "\u001b[0m\u001b[36m[ℹ]  1 task: { 2 sequential sub-tasks: { create IAM role for serviceaccount \"kube-system/fsx-csi-controller-sa\", create serviceaccount \"kube-system/fsx-csi-controller-sa\" } }\n",
      "\u001b[0m\u001b[36m[ℹ]  building iamserviceaccount stack \"eksctl-cluster-addon-iamserviceaccount-kube-system-fsx-csi-controller-sa\"\n",
      "\u001b[0m\u001b[36m[ℹ]  deploying stack \"eksctl-cluster-addon-iamserviceaccount-kube-system-fsx-csi-controller-sa\"\n",
      "\u001b[0m\u001b[36m[ℹ]  created serviceaccount \"kube-system/fsx-csi-controller-sa\"\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!eksctl create iamserviceaccount \\\n",
    "    --region us-west-2 \\\n",
    "    --name fsx-csi-controller-sa \\\n",
    "    --namespace kube-system \\\n",
    "    --cluster cluster \\\n",
    "    --attach-policy-arn $policy_arn \\\n",
    "    --approve"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "cf_stack_name = 'eksctl-cluster-addon-iamserviceaccount-kube-system-fsx-csi-controller-sa'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "role_name = 'eksctl-cluster-addon-iamserviceaccount-kube-Role1-7L9YH5501XP7'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "role_arn = 'arn:aws:iam::231218423789:role/eksctl-cluster-addon-iamserviceaccount-kube-Role1-7L9YH5501XP7'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Deploy CSI Driver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: kubectl apply should be used on resource created by either kubectl create --save-config or kubectl apply\n",
      "serviceaccount/fsx-csi-controller-sa configured\n",
      "clusterrole.rbac.authorization.k8s.io/fsx-csi-external-provisioner-role created\n",
      "clusterrolebinding.rbac.authorization.k8s.io/fsx-csi-external-provisioner-binding created\n",
      "deployment.apps/fsx-csi-controller created\n",
      "daemonset.apps/fsx-csi-node created\n",
      "csidriver.storage.k8s.io/fsx.csi.aws.com created\n"
     ]
    }
   ],
   "source": [
    "!kubectl apply -k \"github.com/kubernetes-sigs/aws-fsx-csi-driver/deploy/kubernetes/overlays/stable/?ref=master\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name:         fsx.csi.aws.com\n",
      "Namespace:    \n",
      "Labels:       <none>\n",
      "Annotations:  kubectl.kubernetes.io/last-applied-configuration:\n",
      "                {\"apiVersion\":\"storage.k8s.io/v1beta1\",\"kind\":\"CSIDriver\",\"metadata\":{\"annotations\":{},\"name\":\"fsx.csi.aws.com\"},\"spec\":{\"attachRequired\":...\n",
      "API Version:  storage.k8s.io/v1beta1\n",
      "Kind:         CSIDriver\n",
      "Metadata:\n",
      "  Creation Timestamp:  2020-10-29T15:04:13Z\n",
      "  Resource Version:    28721012\n",
      "  Self Link:           /apis/storage.k8s.io/v1beta1/csidrivers/fsx.csi.aws.com\n",
      "  UID:                 c7cc13b8-d931-47a4-8d6c-869d89085b03\n",
      "Spec:\n",
      "  Attach Required:    false\n",
      "  Pod Info On Mount:  false\n",
      "  Volume Lifecycle Modes:\n",
      "    Persistent\n",
      "Events:  <none>\n"
     ]
    }
   ],
   "source": [
    "!kubectl describe csidriver.storage.k8s.io/fsx.csi.aws.com"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "serviceaccount/fsx-csi-controller-sa annotated\n"
     ]
    }
   ],
   "source": [
    "!kubectl annotate serviceaccount -n kube-system fsx-csi-controller-sa \\\n",
    "    eks.amazonaws.com/role-arn=$role_arn --overwrite=true"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test the CSI Driver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "make_bucket: fsx-csi-231218423789\n"
     ]
    }
   ],
   "source": [
    "!aws s3 mb s3://fsx-csi-231218423789"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "!echo test-file >> testfile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "upload: ./testfile to s3://fsx-csi-231218423789/export/testfile   \n"
     ]
    }
   ],
   "source": [
    "!aws s3 cp testfile s3://fsx-csi-231218423789/export/testfile"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Download Storage Class Manifest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
      "100   336  100   336    0     0   1647      0 --:--:-- --:--:-- --:--:--  1639\n"
     ]
    }
   ],
   "source": [
    "#!curl -o storageclass.yaml https://raw.githubusercontent.com/kubernetes-sigs/aws-fsx-csi-driver/master/examples/kubernetes/dynamic_provisioning_s3/specs/storageclass.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[94mkind\u001b[39;49;00m: StorageClass\n",
      "\u001b[94mapiVersion\u001b[39;49;00m: storage.k8s.io/v1\n",
      "\u001b[94mmetadata\u001b[39;49;00m:\n",
      "  \u001b[94mname\u001b[39;49;00m: fsx-sc\n",
      "\u001b[94mprovisioner\u001b[39;49;00m: fsx.csi.aws.com\n",
      "\u001b[94mparameters\u001b[39;49;00m:\n",
      "  \u001b[94msubnetId\u001b[39;49;00m: subnet-0394df757ad94fdf0 \n",
      "  \u001b[94msecurityGroupIds\u001b[39;49;00m: sg-08de51a009f6896c4\n",
      "  \u001b[94mautoImportPolicy\u001b[39;49;00m: NEW\n",
      "  \u001b[94ms3ImportPath\u001b[39;49;00m: s3://fsx-csi-231218423789\n",
      "  \u001b[94ms3ExportPath\u001b[39;49;00m: s3://fsx-csi-231218423789\n",
      "  \u001b[94mdeploymentType\u001b[39;49;00m: SCRATCH_2\n"
     ]
    }
   ],
   "source": [
    "!pygmentize eks-csi-fsx/storageclass.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "export VPC_ID=vpc-05cd6f5a363b289c2\n",
      "export SUBNET_ID=subnet-0394df757ad94fdf0\n",
      "sg-08de51a009f6896c4\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "\n",
    "source ~/.bash_profile\n",
    "\n",
    "#### Get VPC ID\n",
    "export VPC_ID=$(aws ec2 describe-vpcs --filters \"Name=tag:Name,Values=eksctl-${AWS_CLUSTER_NAME}-cluster/VPC\" --query \"Vpcs[0].VpcId\" --output text)\n",
    "echo \"export VPC_ID=${VPC_ID}\" | tee -a ~/.bash_profile\n",
    "\n",
    "#### Get Subnet ID\n",
    "export SUBNET_ID=$(aws ec2 describe-subnets --filters \"Name=vpc-id,Values=${VPC_ID}\" --query \"Subnets[0].SubnetId\" --output text)\n",
    "echo \"export SUBNET_ID=${SUBNET_ID}\" | tee -a ~/.bash_profile\n",
    "\n",
    "aws ec2 create-security-group --group-name eks-fsx-security-group4 --vpc-id ${VPC_ID} --description \"FSx for Lustre Security Group\" --query \"GroupId\" --output text\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "security_group='sg-08de51a009f6896c4'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws ec2 authorize-security-group-ingress --group-id $security_group --protocol tcp --port 988 --cidr 192.168.0.0/16"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## _Update storageclass.yaml_\n",
    "https://docs.aws.amazon.com/fsx/latest/LustreGuide/getting-started-step1.html\n",
    "  \n",
    "https://github.com/kubernetes-sigs/aws-fsx-csi-driver/blob/master/examples/kubernetes/dynamic_provisioning_s3/README.md"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[94mkind\u001b[39;49;00m: StorageClass\n",
      "\u001b[94mapiVersion\u001b[39;49;00m: storage.k8s.io/v1\n",
      "\u001b[94mmetadata\u001b[39;49;00m:\n",
      "  \u001b[94mname\u001b[39;49;00m: fsx-sc\n",
      "\u001b[94mprovisioner\u001b[39;49;00m: fsx.csi.aws.com\n",
      "\u001b[94mparameters\u001b[39;49;00m:\n",
      "  \u001b[94msubnetId\u001b[39;49;00m: subnet-0394df757ad94fdf0 \n",
      "  \u001b[94msecurityGroupIds\u001b[39;49;00m: sg-08de51a009f6896c4\n",
      "  \u001b[94mautoImportPolicy\u001b[39;49;00m: NEW\n",
      "  \u001b[94ms3ImportPath\u001b[39;49;00m: s3://fsx-csi-231218423789\n",
      "  \u001b[94ms3ExportPath\u001b[39;49;00m: s3://fsx-csi-231218423789\n",
      "  \u001b[94mdeploymentType\u001b[39;49;00m: SCRATCH_2\n"
     ]
    }
   ],
   "source": [
    "!pygmentize eks-csi-fsx/storageclass.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "storageclass.storage.k8s.io \"fsx-sc\" deleted\n"
     ]
    }
   ],
   "source": [
    "!kubectl delete -f eks-csi-fsx/storageclass.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "storageclass.storage.k8s.io/fsx-sc created\n"
     ]
    }
   ],
   "source": [
    "!kubectl apply -f eks-csi-fsx/storageclass.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                        PROVISIONER                                                RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE\n",
      "fsx-sc                      fsx.csi.aws.com                                            Delete          Immediate              false                  56s\n",
      "local-hostpath              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  9h\n",
      "openebs-device              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  9h\n",
      "openebs-hostpath            openebs.io/local                                           Delete          WaitForFirstConsumer   false                  9h\n",
      "openebs-jiva-default        openebs.io/provisioner-iscsi                               Delete          Immediate              false                  9h\n",
      "openebs-snapshot-promoter   volumesnapshot.external-storage.k8s.io/snapshot-promoter   Delete          Immediate              false                  9h\n"
     ]
    }
   ],
   "source": [
    "!kubectl get sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
      "100   188  100   188    0     0    949      0 --:--:-- --:--:-- --:--:--   944\n"
     ]
    }
   ],
   "source": [
    "#!curl -o claim.yaml https://raw.githubusercontent.com/kubernetes-sigs/aws-fsx-csi-driver/master/examples/kubernetes/dynamic_provisioning_s3/specs/claim.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[94mapiVersion\u001b[39;49;00m: v1\n",
      "\u001b[94mkind\u001b[39;49;00m: PersistentVolumeClaim\n",
      "\u001b[94mmetadata\u001b[39;49;00m:\n",
      "  \u001b[94mname\u001b[39;49;00m: fsx-claim\n",
      "\u001b[94mspec\u001b[39;49;00m:\n",
      "  \u001b[94maccessModes\u001b[39;49;00m:\n",
      "    - ReadWriteMany\n",
      "  \u001b[94mstorageClassName\u001b[39;49;00m: fsx-sc\n",
      "  \u001b[94mresources\u001b[39;49;00m:\n",
      "    \u001b[94mrequests\u001b[39;49;00m:\n",
      "      \u001b[94mstorage\u001b[39;49;00m: 1200Gi\n"
     ]
    }
   ],
   "source": [
    "!pygmentize eks-csi-fsx/claim.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "persistentvolumeclaim \"fsx-claim\" deleted\n"
     ]
    }
   ],
   "source": [
    "!kubectl delete -f eks-csi-fsx/claim.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "persistentvolumeclaim/fsx-claim created\n"
     ]
    }
   ],
   "source": [
    "!kubectl apply -f eks-csi-fsx/claim.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME        STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE\n",
      "fsx-claim   Bound    pvc-61624c47-dd5b-4214-9455-c55560294d53   1200Gi     RWX            fsx-sc         8m40s\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pvc fsx-claim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!kubectl describe pvc fsx-claim"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## _Wait for status == Bound_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Deploy Pod\n",
    "\n",
    "https://github.com/kubernetes-sigs/aws-fsx-csi-driver/tree/master/examples/kubernetes/dynamic_provisioning_s3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pygmentize bert-csi-fsx.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pod/bert-csi-fsx created\n"
     ]
    }
   ],
   "source": [
    "!kubectl create -f bert-csi-fsx.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pod \"bert-csi-fsx\" deleted\n"
     ]
    }
   ],
   "source": [
    "!kubectl delete -f bert-csi-fsx.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME           READY   STATUS    RESTARTS   AGE\n",
      "bert-csi-fsx   1/1     Running   0          5s\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pod bert-csi-fsx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name:         bert-csi-fsx\n",
      "Namespace:    kubeflow\n",
      "Priority:     0\n",
      "Node:         ip-192-168-67-206.us-west-2.compute.internal/192.168.67.206\n",
      "Start Time:   Thu, 29 Oct 2020 20:45:25 +0000\n",
      "Labels:       <none>\n",
      "Annotations:  kubernetes.io/psp: eks.privileged\n",
      "Status:       Running\n",
      "IP:           192.168.64.46\n",
      "Containers:\n",
      "  bert:\n",
      "    Container ID:  docker://637ab2a8bca91218962773895dac6bd904dc018702b713c94dd2071384ddec6a\n",
      "    Image:         763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.1.0-cpu-py36-ubuntu18.04\n",
      "    Image ID:      docker-pullable://763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training@sha256:4911ac31a130c68a2f92b72dd81d22bd02b542cc549c5652f22c1f24e702eaf5\n",
      "    Port:          <none>\n",
      "    Host Port:     <none>\n",
      "    Command:\n",
      "      python\n",
      "      /opt/ml/code/train.py\n",
      "      --train_steps_per_epoch=1\n",
      "      --epochs=1\n",
      "      --learning_rate=0.00001\n",
      "      --epsilon=0.00000001\n",
      "      --train_batch_size=36\n",
      "      --validation_batch_size=18\n",
      "      --test_batch_size=18\n",
      "      --train_steps_per_epoch=1\n",
      "      --validation_steps=1\n",
      "      --test_steps=1\n",
      "      --use_xla=True\n",
      "      --use_amp=False\n",
      "      --max_seq_length=64\n",
      "      --freeze_bert_layer=True\n",
      "      --enable_sagemaker_debugger=False\n",
      "      --enable_checkpointing=False\n",
      "      --enable_tensorboard=False\n",
      "      --run_validation=True\n",
      "      --run_test=False\n",
      "      --run_sample_predictions=False\n",
      "    State:          Running\n",
      "      Started:      Thu, 29 Oct 2020 20:45:27 +0000\n",
      "    Ready:          True\n",
      "    Restart Count:  0\n",
      "    Environment:\n",
      "      SM_TRAINING_ENV:        {\"is_master\":true}\n",
      "      SAGEMAKER_JOB_NAME:     tf-bert-training-eks\n",
      "      SM_CURRENT_HOST:        localhost\n",
      "      SM_NUM_GPUS:            0\n",
      "      SM_HOSTS:               {\"hosts\":\"localhost\"}\n",
      "      SM_MODEL_DIR:           /opt/ml/model/\n",
      "      SM_OUTPUT_DIR:          /opt/ml/output/\n",
      "      SM_OUTPUT_DATA_DIR:     /opt/ml/output/data/\n",
      "      SM_CHANNEL_TRAIN:       /opt/ml/input/data/train\n",
      "      SM_CHANNEL_VALIDATION:  /opt/ml/input/data/validation\n",
      "      SM_CHANNEL_TEST:        /opt/ml/input/data/test\n",
      "    Mounts:\n",
      "      /opt/ml/ from fsx-opt-ml (rw)\n",
      "      /var/run/secrets/kubernetes.io/serviceaccount from default-token-nnkjh (ro)\n",
      "Conditions:\n",
      "  Type              Status\n",
      "  Initialized       True \n",
      "  Ready             True \n",
      "  ContainersReady   True \n",
      "  PodScheduled      True \n",
      "Volumes:\n",
      "  fsx-opt-ml:\n",
      "    Type:       PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)\n",
      "    ClaimName:  fsx-claim\n",
      "    ReadOnly:   false\n",
      "  default-token-nnkjh:\n",
      "    Type:        Secret (a volume populated by a Secret)\n",
      "    SecretName:  default-token-nnkjh\n",
      "    Optional:    false\n",
      "QoS Class:       BestEffort\n",
      "Node-Selectors:  <none>\n",
      "Tolerations:     node.kubernetes.io/not-ready:NoExecute for 300s\n",
      "                 node.kubernetes.io/unreachable:NoExecute for 300s\n",
      "Events:\n",
      "  Type    Reason     Age   From                                                   Message\n",
      "  ----    ------     ----  ----                                                   -------\n",
      "  Normal  Scheduled  10s   default-scheduler                                      Successfully assigned kubeflow/bert-csi-fsx to ip-192-168-67-206.us-west-2.compute.internal\n",
      "  Normal  Pulling    9s    kubelet, ip-192-168-67-206.us-west-2.compute.internal  Pulling image \"763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.1.0-cpu-py36-ubuntu18.04\"\n",
      "  Normal  Pulled     9s    kubelet, ip-192-168-67-206.us-west-2.compute.internal  Successfully pulled image \"763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.1.0-cpu-py36-ubuntu18.04\"\n",
      "  Normal  Created    9s    kubelet, ip-192-168-67-206.us-west-2.compute.internal  Created container bert\n",
      "  Normal  Started    8s    kubelet, ip-192-168-67-206.us-west-2.compute.internal  Started container bert\n"
     ]
    }
   ],
   "source": [
    "!kubectl describe pod bert-csi-fsx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting transformers==2.8.0\n",
      "  Downloading transformers-2.8.0-py3-none-any.whl (563 kB)\n",
      "Collecting tqdm>=4.27\n",
      "  Downloading tqdm-4.51.0-py2.py3-none-any.whl (70 kB)\n",
      "Collecting tokenizers==0.5.2\n",
      "  Downloading tokenizers-0.5.2-cp36-cp36m-manylinux1_x86_64.whl (3.7 MB)\n",
      "Collecting dataclasses; python_version < \"3.7\"\n",
      "  Downloading dataclasses-0.7-py3-none-any.whl (18 kB)\n",
      "Collecting sentencepiece\n",
      "  Downloading sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1 MB)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (1.18.1)\n",
      "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (1.12.43)\n",
      "Collecting regex!=2019.12.17\n",
      "  Downloading regex-2020.10.28-cp36-cp36m-manylinux2010_x86_64.whl (666 kB)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (2.22.0)\n",
      "Collecting sacremoses\n",
      "  Downloading sacremoses-0.0.43.tar.gz (883 kB)\n",
      "Collecting filelock\n",
      "  Downloading filelock-3.0.12-py3-none-any.whl (7.6 kB)\n",
      "Requirement already satisfied: botocore<1.16.0,>=1.15.43 in /usr/local/lib/python3.6/dist-packages (from boto3->transformers==2.8.0) (1.15.43)\n",
      "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->transformers==2.8.0) (0.9.5)\n",
      "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->transformers==2.8.0) (0.3.3)\n",
      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (1.25.9)\n",
      "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (3.0.4)\n",
      "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (2.8)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (2020.4.5.1)\n",
      "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (1.14.0)\n",
      "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (7.1.1)\n",
      "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (0.14.1)\n",
      "Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.43->boto3->transformers==2.8.0) (0.15.2)\n",
      "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.43->boto3->transformers==2.8.0) (2.8.1)\n",
      "Building wheels for collected packages: sacremoses\n",
      "  Building wheel for sacremoses (setup.py): started\n",
      "  Building wheel for sacremoses (setup.py): finished with status 'done'\n",
      "  Created wheel for sacremoses: filename=sacremoses-0.0.43-py3-none-any.whl size=893259 sha256=f56cc45056714486837254b38560390e93016b909fc3aa2f778e503daabe5845\n",
      "  Stored in directory: /root/.cache/pip/wheels/49/25/98/cdea9c79b2d9a22ccc59540b1784b67f06b633378e97f58da2\n",
      "Successfully built sacremoses\n",
      "Installing collected packages: tqdm, tokenizers, dataclasses, sentencepiece, regex, sacremoses, filelock, transformers\n",
      "Successfully installed dataclasses-0.7 filelock-3.0.12 regex-2020.10.28 sacremoses-0.0.43 sentencepiece-0.1.94 tokenizers-0.5.2 tqdm-4.51.0 transformers-2.8.0\n",
      "WARNING: You are using pip version 20.0.2; however, version 20.2.4 is available.\n",
      "You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\n",
      "Requirement already satisfied: sagemaker-tensorflow==2.1.0.1.0.0 in /usr/local/lib/python3.6/dist-packages (2.1.0.1.0.0)\n",
      "WARNING: You are using pip version 20.0.2; however, version 20.2.4 is available.\n",
      "You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\n",
      "Collecting smdebug==0.9.3\n",
      "  Downloading smdebug-0.9.3-py2.py3-none-any.whl (174 kB)\n",
      "Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from smdebug==0.9.3) (20.3)\n",
      "Requirement already satisfied: boto3>=1.10.32 in /usr/local/lib/python3.6/dist-packages (from smdebug==0.9.3) (1.12.43)\n",
      "Requirement already satisfied: numpy<2.0.0,>1.16.0 in /usr/local/lib/python3.6/dist-packages (from smdebug==0.9.3) (1.18.1)\n",
      "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from smdebug==0.9.3) (3.11.3)\n",
      "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from packaging->smdebug==0.9.3) (1.14.0)\n",
      "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->smdebug==0.9.3) (2.4.7)\n",
      "Requirement already satisfied: botocore<1.16.0,>=1.15.43 in /usr/local/lib/python3.6/dist-packages (from boto3>=1.10.32->smdebug==0.9.3) (1.15.43)\n",
      "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3>=1.10.32->smdebug==0.9.3) (0.9.5)\n",
      "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3>=1.10.32->smdebug==0.9.3) (0.3.3)\n",
      "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.0->smdebug==0.9.3) (46.1.3)\n",
      "Requirement already satisfied: urllib3<1.26,>=1.20; python_version != \"3.4\" in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.43->boto3>=1.10.32->smdebug==0.9.3) (1.25.9)\n",
      "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.43->boto3>=1.10.32->smdebug==0.9.3) (2.8.1)\n",
      "Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.43->boto3>=1.10.32->smdebug==0.9.3) (0.15.2)\n",
      "Installing collected packages: smdebug\n",
      "  Attempting uninstall: smdebug\n",
      "    Found existing installation: smdebug 0.7.2\n",
      "    Uninstalling smdebug-0.7.2:\n",
      "      Successfully uninstalled smdebug-0.7.2\n",
      "Successfully installed smdebug-0.9.3\n",
      "WARNING: You are using pip version 20.0.2; however, version 20.2.4 is available.\n",
      "You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\n",
      "Collecting scikit-learn==0.23.1\n",
      "  Downloading scikit_learn-0.23.1-cp36-cp36m-manylinux1_x86_64.whl (6.8 MB)\n",
      "Requirement already satisfied: scipy>=0.19.1 in /usr/local/lib/python3.6/dist-packages (from scikit-learn==0.23.1) (1.4.1)\n",
      "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from scikit-learn==0.23.1) (1.18.1)\n",
      "Collecting threadpoolctl>=2.0.0\n",
      "  Downloading threadpoolctl-2.1.0-py3-none-any.whl (12 kB)\n",
      "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn==0.23.1) (0.14.1)\n",
      "Installing collected packages: threadpoolctl, scikit-learn\n",
      "  Attempting uninstall: scikit-learn\n",
      "    Found existing installation: scikit-learn 0.22\n",
      "    Uninstalling scikit-learn-0.22:\n",
      "      Successfully uninstalled scikit-learn-0.22\n",
      "Successfully installed scikit-learn-0.23.1 threadpoolctl-2.1.0\n",
      "WARNING: You are using pip version 20.0.2; however, version 20.2.4 is available.\n",
      "You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\n",
      "Collecting matplotlib==3.2.1\n",
      "  Downloading matplotlib-3.2.1-cp36-cp36m-manylinux1_x86_64.whl (12.4 MB)\n",
      "Collecting kiwisolver>=1.0.1\n",
      "  Downloading kiwisolver-1.3.0-cp36-cp36m-manylinux2010_x86_64.whl (1.6 MB)\n",
      "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib==3.2.1) (2.4.7)\n",
      "Collecting cycler>=0.10\n",
      "  Downloading cycler-0.10.0-py2.py3-none-any.whl (6.5 kB)\n",
      "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib==3.2.1) (2.8.1)\n",
      "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib==3.2.1) (1.18.1)\n",
      "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from cycler>=0.10->matplotlib==3.2.1) (1.14.0)\n",
      "Installing collected packages: kiwisolver, cycler, matplotlib\n",
      "Successfully installed cycler-0.10.0 kiwisolver-1.3.0 matplotlib-3.2.1\n",
      "WARNING: You are using pip version 20.0.2; however, version 20.2.4 is available.\n",
      "You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\n",
      "Environment Variables:\n",
      "{'ADMISSION_WEBHOOK_SERVICE_PORT': 'tcp://10.100.209.74:443',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP': 'tcp://10.100.209.74:443',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP_ADDR': '10.100.209.74',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_SERVICE_HOST': '10.100.209.74',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_SERVICE_PORT': '443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT': 'tcp://10.100.197.70:443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP': 'tcp://10.100.197.70:443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP_ADDR': '10.100.197.70',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_SERVICE_HOST': '10.100.197.70',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_SERVICE_PORT': '443',\n",
      " 'ARGO_UI_PORT': 'tcp://10.100.90.211:80',\n",
      " 'ARGO_UI_PORT_80_TCP': 'tcp://10.100.90.211:80',\n",
      " 'ARGO_UI_PORT_80_TCP_ADDR': '10.100.90.211',\n",
      " 'ARGO_UI_PORT_80_TCP_PORT': '80',\n",
      " 'ARGO_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'ARGO_UI_SERVICE_HOST': '10.100.90.211',\n",
      " 'ARGO_UI_SERVICE_PORT': '80',\n",
      " 'CENTRALDASHBOARD_PORT': 'tcp://10.100.92.107:80',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP': 'tcp://10.100.92.107:80',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP_ADDR': '10.100.92.107',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP_PORT': '80',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'CENTRALDASHBOARD_SERVICE_HOST': '10.100.92.107',\n",
      " 'CENTRALDASHBOARD_SERVICE_PORT': '80',\n",
      " 'DEBCONF_NONINTERACTIVE_SEEN': 'true',\n",
      " 'DEBIAN_FRONTEND': 'noninteractive',\n",
      " 'HDF5_USE_FILE_LOCKING': 'FALSE',\n",
      " 'HOME': '/root',\n",
      " 'HOSTNAME': 'bert-csi-fsx',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT': 'tcp://10.100.108.128:80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP': 'tcp://10.100.108.128:80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP_ADDR': '10.100.108.128',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP_PORT': '80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'JUPYTER_WEB_APP_SERVICE_SERVICE_HOST': '10.100.108.128',\n",
      " 'JUPYTER_WEB_APP_SERVICE_SERVICE_PORT': '80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_SERVICE_PORT_HTTP': '80',\n",
      " 'KATIB_CONTROLLER_PORT': 'tcp://10.100.237.187:443',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP': 'tcp://10.100.237.187:443',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP_ADDR': '10.100.237.187',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP_PORT': '443',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP': 'tcp://10.100.237.187:8080',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP_ADDR': '10.100.237.187',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP_PORT': '8080',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP_PROTO': 'tcp',\n",
      " 'KATIB_CONTROLLER_SERVICE_HOST': '10.100.237.187',\n",
      " 'KATIB_CONTROLLER_SERVICE_PORT': '443',\n",
      " 'KATIB_CONTROLLER_SERVICE_PORT_METRICS': '8080',\n",
      " 'KATIB_CONTROLLER_SERVICE_PORT_WEBHOOK': '443',\n",
      " 'KATIB_DB_MANAGER_PORT': 'tcp://10.100.165.95:6789',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP': 'tcp://10.100.165.95:6789',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP_ADDR': '10.100.165.95',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP_PORT': '6789',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP_PROTO': 'tcp',\n",
      " 'KATIB_DB_MANAGER_SERVICE_HOST': '10.100.165.95',\n",
      " 'KATIB_DB_MANAGER_SERVICE_PORT': '6789',\n",
      " 'KATIB_DB_MANAGER_SERVICE_PORT_API': '6789',\n",
      " 'KATIB_MYSQL_PORT': 'tcp://10.100.54.100:3306',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP': 'tcp://10.100.54.100:3306',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP_ADDR': '10.100.54.100',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP_PORT': '3306',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP_PROTO': 'tcp',\n",
      " 'KATIB_MYSQL_SERVICE_HOST': '10.100.54.100',\n",
      " 'KATIB_MYSQL_SERVICE_PORT': '3306',\n",
      " 'KATIB_MYSQL_SERVICE_PORT_DBAPI': '3306',\n",
      " 'KATIB_UI_PORT': 'tcp://10.100.226.28:80',\n",
      " 'KATIB_UI_PORT_80_TCP': 'tcp://10.100.226.28:80',\n",
      " 'KATIB_UI_PORT_80_TCP_ADDR': '10.100.226.28',\n",
      " 'KATIB_UI_PORT_80_TCP_PORT': '80',\n",
      " 'KATIB_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'KATIB_UI_SERVICE_HOST': '10.100.226.28',\n",
      " 'KATIB_UI_SERVICE_PORT': '80',\n",
      " 'KATIB_UI_SERVICE_PORT_UI': '80',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT': 'tcp://10.100.12.171:8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP': 'tcp://10.100.12.171:8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP_ADDR': '10.100.12.171',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP_PORT': '8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP_PROTO': 'tcp',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_SERVICE_HOST': '10.100.12.171',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_SERVICE_PORT': '8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_SERVICE_PORT_HTTPS': '8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT': 'tcp://10.100.143.28:443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP': 'tcp://10.100.143.28:443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP_ADDR': '10.100.143.28',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_SERVICE_HOST': '10.100.143.28',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_SERVICE_PORT': '443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT': 'tcp://10.100.245.215:443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP': 'tcp://10.100.245.215:443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP_ADDR': '10.100.245.215',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_SERVICE_HOST': '10.100.245.215',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_SERVICE_PORT': '443',\n",
      " 'KMP_AFFINITY': 'granularity=fine,compact,1,0',\n",
      " 'KMP_BLOCKTIME': '1',\n",
      " 'KMP_DUPLICATE_LIB_OK': 'True',\n",
      " 'KMP_INIT_AT_FORK': 'FALSE',\n",
      " 'KMP_SETTINGS': '0',\n",
      " 'KUBERNETES_PORT': 'tcp://10.100.0.1:443',\n",
      " 'KUBERNETES_PORT_443_TCP': 'tcp://10.100.0.1:443',\n",
      " 'KUBERNETES_PORT_443_TCP_ADDR': '10.100.0.1',\n",
      " 'KUBERNETES_PORT_443_TCP_PORT': '443',\n",
      " 'KUBERNETES_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KUBERNETES_SERVICE_HOST': '10.100.0.1',\n",
      " 'KUBERNETES_SERVICE_PORT': '443',\n",
      " 'KUBERNETES_SERVICE_PORT_HTTPS': '443',\n",
      " 'LANG': 'C.UTF-8',\n",
      " 'LC_ALL': 'C.UTF-8',\n",
      " 'LD_LIBRARY_PATH': '/usr/local/openmpi/lib:',\n",
      " 'METADATA_DB_PORT': 'tcp://10.100.185.144:3306',\n",
      " 'METADATA_DB_PORT_3306_TCP': 'tcp://10.100.185.144:3306',\n",
      " 'METADATA_DB_PORT_3306_TCP_ADDR': '10.100.185.144',\n",
      " 'METADATA_DB_PORT_3306_TCP_PORT': '3306',\n",
      " 'METADATA_DB_PORT_3306_TCP_PROTO': 'tcp',\n",
      " 'METADATA_DB_SERVICE_HOST': '10.100.185.144',\n",
      " 'METADATA_DB_SERVICE_PORT': '3306',\n",
      " 'METADATA_DB_SERVICE_PORT_DBAPI': '3306',\n",
      " 'METADATA_ENVOY_SERVICE_PORT': 'tcp://10.100.187.207:9090',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP': 'tcp://10.100.187.207:9090',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP_ADDR': '10.100.187.207',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP_PORT': '9090',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP_PROTO': 'tcp',\n",
      " 'METADATA_ENVOY_SERVICE_SERVICE_HOST': '10.100.187.207',\n",
      " 'METADATA_ENVOY_SERVICE_SERVICE_PORT': '9090',\n",
      " 'METADATA_ENVOY_SERVICE_SERVICE_PORT_MD_ENVOY': '9090',\n",
      " 'METADATA_GRPC_SERVICE_PORT': 'tcp://10.100.230.18:8080',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP': 'tcp://10.100.230.18:8080',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP_ADDR': '10.100.230.18',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP_PORT': '8080',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP_PROTO': 'tcp',\n",
      " 'METADATA_GRPC_SERVICE_SERVICE_HOST': '10.100.230.18',\n",
      " 'METADATA_GRPC_SERVICE_SERVICE_PORT': '8080',\n",
      " 'METADATA_GRPC_SERVICE_SERVICE_PORT_GRPC_BACKENDAPI': '8080',\n",
      " 'METADATA_SERVICE_PORT': 'tcp://10.100.222.237:8080',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP': 'tcp://10.100.222.237:8080',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP_ADDR': '10.100.222.237',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP_PORT': '8080',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP_PROTO': 'tcp',\n",
      " 'METADATA_SERVICE_SERVICE_HOST': '10.100.222.237',\n",
      " 'METADATA_SERVICE_SERVICE_PORT': '8080',\n",
      " 'METADATA_SERVICE_SERVICE_PORT_BACKENDAPI': '8080',\n",
      " 'METADATA_UI_PORT': 'tcp://10.100.5.220:80',\n",
      " 'METADATA_UI_PORT_80_TCP': 'tcp://10.100.5.220:80',\n",
      " 'METADATA_UI_PORT_80_TCP_ADDR': '10.100.5.220',\n",
      " 'METADATA_UI_PORT_80_TCP_PORT': '80',\n",
      " 'METADATA_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'METADATA_UI_SERVICE_HOST': '10.100.5.220',\n",
      " 'METADATA_UI_SERVICE_PORT': '80',\n",
      " 'MINIO_SERVICE_PORT': 'tcp://10.100.139.25:9000',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP': 'tcp://10.100.139.25:9000',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP_ADDR': '10.100.139.25',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP_PORT': '9000',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP_PROTO': 'tcp',\n",
      " 'MINIO_SERVICE_SERVICE_HOST': '10.100.139.25',\n",
      " 'MINIO_SERVICE_SERVICE_PORT': '9000',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT': 'tcp://10.100.42.220:8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP': 'tcp://10.100.42.220:8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP_ADDR': '10.100.42.220',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP_PORT': '8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST': '10.100.42.220',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT': '8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT_HTTP': '8888',\n",
      " 'ML_PIPELINE_PORT': 'tcp://10.100.44.146:8888',\n",
      " 'ML_PIPELINE_PORT_8887_TCP': 'tcp://10.100.44.146:8887',\n",
      " 'ML_PIPELINE_PORT_8887_TCP_ADDR': '10.100.44.146',\n",
      " 'ML_PIPELINE_PORT_8887_TCP_PORT': '8887',\n",
      " 'ML_PIPELINE_PORT_8887_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_PORT_8888_TCP': 'tcp://10.100.44.146:8888',\n",
      " 'ML_PIPELINE_PORT_8888_TCP_ADDR': '10.100.44.146',\n",
      " 'ML_PIPELINE_PORT_8888_TCP_PORT': '8888',\n",
      " 'ML_PIPELINE_PORT_8888_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_SERVICE_HOST': '10.100.44.146',\n",
      " 'ML_PIPELINE_SERVICE_PORT': '8888',\n",
      " 'ML_PIPELINE_SERVICE_PORT_GRPC': '8887',\n",
      " 'ML_PIPELINE_SERVICE_PORT_HTTP': '8888',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT': 'tcp://10.100.204.167:80',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP': 'tcp://10.100.204.167:80',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP_ADDR': '10.100.204.167',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP_PORT': '80',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_SERVICE_HOST': '10.100.204.167',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_SERVICE_PORT': '80',\n",
      " 'ML_PIPELINE_UI_PORT': 'tcp://10.100.245.123:80',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP': 'tcp://10.100.245.123:80',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP_ADDR': '10.100.245.123',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP_PORT': '80',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_UI_SERVICE_HOST': '10.100.245.123',\n",
      " 'ML_PIPELINE_UI_SERVICE_PORT': '80',\n",
      " 'MYSQL_PORT': 'tcp://10.100.102.255:3306',\n",
      " 'MYSQL_PORT_3306_TCP': 'tcp://10.100.102.255:3306',\n",
      " 'MYSQL_PORT_3306_TCP_ADDR': '10.100.102.255',\n",
      " 'MYSQL_PORT_3306_TCP_PORT': '3306',\n",
      " 'MYSQL_PORT_3306_TCP_PROTO': 'tcp',\n",
      " 'MYSQL_SERVICE_HOST': '10.100.102.255',\n",
      " 'MYSQL_SERVICE_PORT': '3306',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT': 'tcp://10.100.186.94:443',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP': 'tcp://10.100.186.94:443',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP_ADDR': '10.100.186.94',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_SERVICE_HOST': '10.100.186.94',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_SERVICE_PORT': '443',\n",
      " 'PATH': '/usr/local/openmpi/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin',\n",
      " 'PROFILES_KFAM_PORT': 'tcp://10.100.70.20:8081',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP': 'tcp://10.100.70.20:8081',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP_ADDR': '10.100.70.20',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP_PORT': '8081',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP_PROTO': 'tcp',\n",
      " 'PROFILES_KFAM_SERVICE_HOST': '10.100.70.20',\n",
      " 'PROFILES_KFAM_SERVICE_PORT': '8081',\n",
      " 'PYTHONDONTWRITEBYTECODE': '1',\n",
      " 'PYTHONIOENCODING': 'UTF-8',\n",
      " 'PYTHONUNBUFFERED': '1',\n",
      " 'PYTORCH_OPERATOR_PORT': 'tcp://10.100.47.179:8443',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP': 'tcp://10.100.47.179:8443',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP_ADDR': '10.100.47.179',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP_PORT': '8443',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP_PROTO': 'tcp',\n",
      " 'PYTORCH_OPERATOR_SERVICE_HOST': '10.100.47.179',\n",
      " 'PYTORCH_OPERATOR_SERVICE_PORT': '8443',\n",
      " 'PYTORCH_OPERATOR_SERVICE_PORT_MONITORING_PORT': '8443',\n",
      " 'SAGEMAKER_JOB_NAME': 'tf-bert-training-eks',\n",
      " 'SAGEMAKER_TRAINING_MODULE': 'sagemaker_tensorflow_container.training:main',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT': 'tcp://10.100.52.235:443',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP': 'tcp://10.100.52.235:443',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP_ADDR': '10.100.52.235',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'SELDON_WEBHOOK_SERVICE_SERVICE_HOST': '10.100.52.235',\n",
      " 'SELDON_WEBHOOK_SERVICE_SERVICE_PORT': '443',\n",
      " 'SM_CHANNEL_TEST': '/opt/ml/input/data/test',\n",
      " 'SM_CHANNEL_TRAIN': '/opt/ml/input/data/train',\n",
      " 'SM_CHANNEL_VALIDATION': '/opt/ml/input/data/validation',\n",
      " 'SM_CURRENT_HOST': 'localhost',\n",
      " 'SM_HOSTS': '{\"hosts\":\"localhost\"}',\n",
      " 'SM_MODEL_DIR': '/opt/ml/model/',\n",
      " 'SM_NUM_GPUS': '0',\n",
      " 'SM_OUTPUT_DATA_DIR': '/opt/ml/output/data/',\n",
      " 'SM_OUTPUT_DIR': '/opt/ml/output/',\n",
      " 'SM_TRAINING_ENV': '{\"is_master\":true}',\n",
      " 'TENSORBOARD_PORT': 'tcp://10.100.185.101:9000',\n",
      " 'TENSORBOARD_PORT_9000_TCP': 'tcp://10.100.185.101:9000',\n",
      " 'TENSORBOARD_PORT_9000_TCP_ADDR': '10.100.185.101',\n",
      " 'TENSORBOARD_PORT_9000_TCP_PORT': '9000',\n",
      " 'TENSORBOARD_PORT_9000_TCP_PROTO': 'tcp',\n",
      " 'TENSORBOARD_SERVICE_HOST': '10.100.185.101',\n",
      " 'TENSORBOARD_SERVICE_PORT': '9000',\n",
      " 'TENSORBOARD_SERVICE_PORT_TB': '9000',\n",
      " 'TF_JOB_OPERATOR_PORT': 'tcp://10.100.5.156:8443',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP': 'tcp://10.100.5.156:8443',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP_ADDR': '10.100.5.156',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP_PORT': '8443',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP_PROTO': 'tcp',\n",
      " 'TF_JOB_OPERATOR_SERVICE_HOST': '10.100.5.156',\n",
      " 'TF_JOB_OPERATOR_SERVICE_PORT': '8443',\n",
      " 'TF_JOB_OPERATOR_SERVICE_PORT_MONITORING_PORT': '8443'}\n",
      "Listing /opt...\n",
      "/opt/ml,['code', 'input', 'model', 'export'],part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord\n",
      "/opt/ml,['code', 'input', 'model', 'export'],part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord\n",
      "/opt/ml/code,[],train.py\n",
      "/opt/ml/input/data/validation,[],part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord\n",
      "/opt/ml/input/data/validation,[],part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord\n",
      "/opt/ml/input/data/test,[],part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord\n",
      "/opt/ml/input/data/test,[],part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord\n",
      "/opt/ml/input/data/train,[],part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord\n",
      "/opt/ml/input/data/train,[],part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord\n",
      "/opt/ml/model/transformers/fine-tuned,[],config.json\n",
      "/opt/ml/model/transformers/fine-tuned,[],tf_model.h5\n",
      "/opt/ml/export,[],testfile\n",
      "Done.\n",
      "Args:\n",
      "Namespace(checkpoint_base_path='/opt/ml/checkpoints', current_host='localhost', enable_checkpointing=False, enable_sagemaker_debugger=False, enable_tensorboard=False, epochs=1, epsilon=1e-08, freeze_bert_layer=True, hosts={'hosts': 'localhost'}, learning_rate=1e-05, max_seq_length=64, num_gpus=0, output_data_dir='/opt/ml/output/data/', output_dir='/opt/ml/output/', run_sample_predictions=False, run_test=False, run_validation=True, test_batch_size=18, test_data='/opt/ml/input/data/test', test_steps=1, train_batch_size=36, train_data='/opt/ml/input/data/train', train_steps_per_epoch=1, use_amp=False, use_xla=True, validation_batch_size=18, validation_data='/opt/ml/input/data/validation', validation_steps=1)\n",
      "Environment Variables:\n",
      "{'ADMISSION_WEBHOOK_SERVICE_PORT': 'tcp://10.100.209.74:443',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP': 'tcp://10.100.209.74:443',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP_ADDR': '10.100.209.74',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_SERVICE_HOST': '10.100.209.74',\n",
      " 'ADMISSION_WEBHOOK_SERVICE_SERVICE_PORT': '443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT': 'tcp://10.100.197.70:443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP': 'tcp://10.100.197.70:443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP_ADDR': '10.100.197.70',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_SERVICE_HOST': '10.100.197.70',\n",
      " 'APPLICATION_CONTROLLER_SERVICE_SERVICE_PORT': '443',\n",
      " 'ARGO_UI_PORT': 'tcp://10.100.90.211:80',\n",
      " 'ARGO_UI_PORT_80_TCP': 'tcp://10.100.90.211:80',\n",
      " 'ARGO_UI_PORT_80_TCP_ADDR': '10.100.90.211',\n",
      " 'ARGO_UI_PORT_80_TCP_PORT': '80',\n",
      " 'ARGO_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'ARGO_UI_SERVICE_HOST': '10.100.90.211',\n",
      " 'ARGO_UI_SERVICE_PORT': '80',\n",
      " 'CENTRALDASHBOARD_PORT': 'tcp://10.100.92.107:80',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP': 'tcp://10.100.92.107:80',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP_ADDR': '10.100.92.107',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP_PORT': '80',\n",
      " 'CENTRALDASHBOARD_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'CENTRALDASHBOARD_SERVICE_HOST': '10.100.92.107',\n",
      " 'CENTRALDASHBOARD_SERVICE_PORT': '80',\n",
      " 'DEBCONF_NONINTERACTIVE_SEEN': 'true',\n",
      " 'DEBIAN_FRONTEND': 'noninteractive',\n",
      " 'HDF5_USE_FILE_LOCKING': 'FALSE',\n",
      " 'HOME': '/root',\n",
      " 'HOSTNAME': 'bert-csi-fsx',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT': 'tcp://10.100.108.128:80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP': 'tcp://10.100.108.128:80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP_ADDR': '10.100.108.128',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP_PORT': '80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'JUPYTER_WEB_APP_SERVICE_SERVICE_HOST': '10.100.108.128',\n",
      " 'JUPYTER_WEB_APP_SERVICE_SERVICE_PORT': '80',\n",
      " 'JUPYTER_WEB_APP_SERVICE_SERVICE_PORT_HTTP': '80',\n",
      " 'KATIB_CONTROLLER_PORT': 'tcp://10.100.237.187:443',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP': 'tcp://10.100.237.187:443',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP_ADDR': '10.100.237.187',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP_PORT': '443',\n",
      " 'KATIB_CONTROLLER_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP': 'tcp://10.100.237.187:8080',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP_ADDR': '10.100.237.187',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP_PORT': '8080',\n",
      " 'KATIB_CONTROLLER_PORT_8080_TCP_PROTO': 'tcp',\n",
      " 'KATIB_CONTROLLER_SERVICE_HOST': '10.100.237.187',\n",
      " 'KATIB_CONTROLLER_SERVICE_PORT': '443',\n",
      " 'KATIB_CONTROLLER_SERVICE_PORT_METRICS': '8080',\n",
      " 'KATIB_CONTROLLER_SERVICE_PORT_WEBHOOK': '443',\n",
      " 'KATIB_DB_MANAGER_PORT': 'tcp://10.100.165.95:6789',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP': 'tcp://10.100.165.95:6789',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP_ADDR': '10.100.165.95',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP_PORT': '6789',\n",
      " 'KATIB_DB_MANAGER_PORT_6789_TCP_PROTO': 'tcp',\n",
      " 'KATIB_DB_MANAGER_SERVICE_HOST': '10.100.165.95',\n",
      " 'KATIB_DB_MANAGER_SERVICE_PORT': '6789',\n",
      " 'KATIB_DB_MANAGER_SERVICE_PORT_API': '6789',\n",
      " 'KATIB_MYSQL_PORT': 'tcp://10.100.54.100:3306',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP': 'tcp://10.100.54.100:3306',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP_ADDR': '10.100.54.100',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP_PORT': '3306',\n",
      " 'KATIB_MYSQL_PORT_3306_TCP_PROTO': 'tcp',\n",
      " 'KATIB_MYSQL_SERVICE_HOST': '10.100.54.100',\n",
      " 'KATIB_MYSQL_SERVICE_PORT': '3306',\n",
      " 'KATIB_MYSQL_SERVICE_PORT_DBAPI': '3306',\n",
      " 'KATIB_UI_PORT': 'tcp://10.100.226.28:80',\n",
      " 'KATIB_UI_PORT_80_TCP': 'tcp://10.100.226.28:80',\n",
      " 'KATIB_UI_PORT_80_TCP_ADDR': '10.100.226.28',\n",
      " 'KATIB_UI_PORT_80_TCP_PORT': '80',\n",
      " 'KATIB_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'KATIB_UI_SERVICE_HOST': '10.100.226.28',\n",
      " 'KATIB_UI_SERVICE_PORT': '80',\n",
      " 'KATIB_UI_SERVICE_PORT_UI': '80',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT': 'tcp://10.100.12.171:8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP': 'tcp://10.100.12.171:8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP_ADDR': '10.100.12.171',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP_PORT': '8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_PORT_8443_TCP_PROTO': 'tcp',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_SERVICE_HOST': '10.100.12.171',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_SERVICE_PORT': '8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_METRICS_SERVICE_SERVICE_PORT_HTTPS': '8443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT': 'tcp://10.100.143.28:443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP': 'tcp://10.100.143.28:443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP_ADDR': '10.100.143.28',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_SERVICE_HOST': '10.100.143.28',\n",
      " 'KFSERVING_CONTROLLER_MANAGER_SERVICE_SERVICE_PORT': '443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT': 'tcp://10.100.245.215:443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP': 'tcp://10.100.245.215:443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP_ADDR': '10.100.245.215',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_SERVICE_HOST': '10.100.245.215',\n",
      " 'KFSERVING_WEBHOOK_SERVER_SERVICE_SERVICE_PORT': '443',\n",
      " 'KMP_AFFINITY': 'granularity=fine,compact,1,0',\n",
      " 'KMP_BLOCKTIME': '1',\n",
      " 'KMP_DUPLICATE_LIB_OK': 'True',\n",
      " 'KMP_INIT_AT_FORK': 'FALSE',\n",
      " 'KMP_SETTINGS': '0',\n",
      " 'KUBERNETES_PORT': 'tcp://10.100.0.1:443',\n",
      " 'KUBERNETES_PORT_443_TCP': 'tcp://10.100.0.1:443',\n",
      " 'KUBERNETES_PORT_443_TCP_ADDR': '10.100.0.1',\n",
      " 'KUBERNETES_PORT_443_TCP_PORT': '443',\n",
      " 'KUBERNETES_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'KUBERNETES_SERVICE_HOST': '10.100.0.1',\n",
      " 'KUBERNETES_SERVICE_PORT': '443',\n",
      " 'KUBERNETES_SERVICE_PORT_HTTPS': '443',\n",
      " 'LANG': 'C.UTF-8',\n",
      " 'LC_ALL': 'C.UTF-8',\n",
      " 'LD_LIBRARY_PATH': '/usr/local/openmpi/lib:',\n",
      " 'METADATA_DB_PORT': 'tcp://10.100.185.144:3306',\n",
      " 'METADATA_DB_PORT_3306_TCP': 'tcp://10.100.185.144:3306',\n",
      " 'METADATA_DB_PORT_3306_TCP_ADDR': '10.100.185.144',\n",
      " 'METADATA_DB_PORT_3306_TCP_PORT': '3306',\n",
      " 'METADATA_DB_PORT_3306_TCP_PROTO': 'tcp',\n",
      " 'METADATA_DB_SERVICE_HOST': '10.100.185.144',\n",
      " 'METADATA_DB_SERVICE_PORT': '3306',\n",
      " 'METADATA_DB_SERVICE_PORT_DBAPI': '3306',\n",
      " 'METADATA_ENVOY_SERVICE_PORT': 'tcp://10.100.187.207:9090',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP': 'tcp://10.100.187.207:9090',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP_ADDR': '10.100.187.207',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP_PORT': '9090',\n",
      " 'METADATA_ENVOY_SERVICE_PORT_9090_TCP_PROTO': 'tcp',\n",
      " 'METADATA_ENVOY_SERVICE_SERVICE_HOST': '10.100.187.207',\n",
      " 'METADATA_ENVOY_SERVICE_SERVICE_PORT': '9090',\n",
      " 'METADATA_ENVOY_SERVICE_SERVICE_PORT_MD_ENVOY': '9090',\n",
      " 'METADATA_GRPC_SERVICE_PORT': 'tcp://10.100.230.18:8080',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP': 'tcp://10.100.230.18:8080',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP_ADDR': '10.100.230.18',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP_PORT': '8080',\n",
      " 'METADATA_GRPC_SERVICE_PORT_8080_TCP_PROTO': 'tcp',\n",
      " 'METADATA_GRPC_SERVICE_SERVICE_HOST': '10.100.230.18',\n",
      " 'METADATA_GRPC_SERVICE_SERVICE_PORT': '8080',\n",
      " 'METADATA_GRPC_SERVICE_SERVICE_PORT_GRPC_BACKENDAPI': '8080',\n",
      " 'METADATA_SERVICE_PORT': 'tcp://10.100.222.237:8080',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP': 'tcp://10.100.222.237:8080',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP_ADDR': '10.100.222.237',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP_PORT': '8080',\n",
      " 'METADATA_SERVICE_PORT_8080_TCP_PROTO': 'tcp',\n",
      " 'METADATA_SERVICE_SERVICE_HOST': '10.100.222.237',\n",
      " 'METADATA_SERVICE_SERVICE_PORT': '8080',\n",
      " 'METADATA_SERVICE_SERVICE_PORT_BACKENDAPI': '8080',\n",
      " 'METADATA_UI_PORT': 'tcp://10.100.5.220:80',\n",
      " 'METADATA_UI_PORT_80_TCP': 'tcp://10.100.5.220:80',\n",
      " 'METADATA_UI_PORT_80_TCP_ADDR': '10.100.5.220',\n",
      " 'METADATA_UI_PORT_80_TCP_PORT': '80',\n",
      " 'METADATA_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'METADATA_UI_SERVICE_HOST': '10.100.5.220',\n",
      " 'METADATA_UI_SERVICE_PORT': '80',\n",
      " 'MINIO_SERVICE_PORT': 'tcp://10.100.139.25:9000',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP': 'tcp://10.100.139.25:9000',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP_ADDR': '10.100.139.25',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP_PORT': '9000',\n",
      " 'MINIO_SERVICE_PORT_9000_TCP_PROTO': 'tcp',\n",
      " 'MINIO_SERVICE_SERVICE_HOST': '10.100.139.25',\n",
      " 'MINIO_SERVICE_SERVICE_PORT': '9000',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT': 'tcp://10.100.42.220:8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP': 'tcp://10.100.42.220:8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP_ADDR': '10.100.42.220',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP_PORT': '8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_PORT_8888_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST': '10.100.42.220',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT': '8888',\n",
      " 'ML_PIPELINE_ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT_HTTP': '8888',\n",
      " 'ML_PIPELINE_PORT': 'tcp://10.100.44.146:8888',\n",
      " 'ML_PIPELINE_PORT_8887_TCP': 'tcp://10.100.44.146:8887',\n",
      " 'ML_PIPELINE_PORT_8887_TCP_ADDR': '10.100.44.146',\n",
      " 'ML_PIPELINE_PORT_8887_TCP_PORT': '8887',\n",
      " 'ML_PIPELINE_PORT_8887_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_PORT_8888_TCP': 'tcp://10.100.44.146:8888',\n",
      " 'ML_PIPELINE_PORT_8888_TCP_ADDR': '10.100.44.146',\n",
      " 'ML_PIPELINE_PORT_8888_TCP_PORT': '8888',\n",
      " 'ML_PIPELINE_PORT_8888_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_SERVICE_HOST': '10.100.44.146',\n",
      " 'ML_PIPELINE_SERVICE_PORT': '8888',\n",
      " 'ML_PIPELINE_SERVICE_PORT_GRPC': '8887',\n",
      " 'ML_PIPELINE_SERVICE_PORT_HTTP': '8888',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT': 'tcp://10.100.204.167:80',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP': 'tcp://10.100.204.167:80',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP_ADDR': '10.100.204.167',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP_PORT': '80',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_SERVICE_HOST': '10.100.204.167',\n",
      " 'ML_PIPELINE_TENSORBOARD_UI_SERVICE_PORT': '80',\n",
      " 'ML_PIPELINE_UI_PORT': 'tcp://10.100.245.123:80',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP': 'tcp://10.100.245.123:80',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP_ADDR': '10.100.245.123',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP_PORT': '80',\n",
      " 'ML_PIPELINE_UI_PORT_80_TCP_PROTO': 'tcp',\n",
      " 'ML_PIPELINE_UI_SERVICE_HOST': '10.100.245.123',\n",
      " 'ML_PIPELINE_UI_SERVICE_PORT': '80',\n",
      " 'MYSQL_PORT': 'tcp://10.100.102.255:3306',\n",
      " 'MYSQL_PORT_3306_TCP': 'tcp://10.100.102.255:3306',\n",
      " 'MYSQL_PORT_3306_TCP_ADDR': '10.100.102.255',\n",
      " 'MYSQL_PORT_3306_TCP_PORT': '3306',\n",
      " 'MYSQL_PORT_3306_TCP_PROTO': 'tcp',\n",
      " 'MYSQL_SERVICE_HOST': '10.100.102.255',\n",
      " 'MYSQL_SERVICE_PORT': '3306',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT': 'tcp://10.100.186.94:443',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP': 'tcp://10.100.186.94:443',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP_ADDR': '10.100.186.94',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_SERVICE_HOST': '10.100.186.94',\n",
      " 'NOTEBOOK_CONTROLLER_SERVICE_SERVICE_PORT': '443',\n",
      " 'PATH': '/usr/local/openmpi/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin',\n",
      " 'PROFILES_KFAM_PORT': 'tcp://10.100.70.20:8081',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP': 'tcp://10.100.70.20:8081',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP_ADDR': '10.100.70.20',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP_PORT': '8081',\n",
      " 'PROFILES_KFAM_PORT_8081_TCP_PROTO': 'tcp',\n",
      " 'PROFILES_KFAM_SERVICE_HOST': '10.100.70.20',\n",
      " 'PROFILES_KFAM_SERVICE_PORT': '8081',\n",
      " 'PYTHONDONTWRITEBYTECODE': '1',\n",
      " 'PYTHONIOENCODING': 'UTF-8',\n",
      " 'PYTHONUNBUFFERED': '1',\n",
      " 'PYTORCH_OPERATOR_PORT': 'tcp://10.100.47.179:8443',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP': 'tcp://10.100.47.179:8443',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP_ADDR': '10.100.47.179',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP_PORT': '8443',\n",
      " 'PYTORCH_OPERATOR_PORT_8443_TCP_PROTO': 'tcp',\n",
      " 'PYTORCH_OPERATOR_SERVICE_HOST': '10.100.47.179',\n",
      " 'PYTORCH_OPERATOR_SERVICE_PORT': '8443',\n",
      " 'PYTORCH_OPERATOR_SERVICE_PORT_MONITORING_PORT': '8443',\n",
      " 'SAGEMAKER_JOB_NAME': 'tf-bert-training-eks',\n",
      " 'SAGEMAKER_TRAINING_MODULE': 'sagemaker_tensorflow_container.training:main',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT': 'tcp://10.100.52.235:443',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP': 'tcp://10.100.52.235:443',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP_ADDR': '10.100.52.235',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP_PORT': '443',\n",
      " 'SELDON_WEBHOOK_SERVICE_PORT_443_TCP_PROTO': 'tcp',\n",
      " 'SELDON_WEBHOOK_SERVICE_SERVICE_HOST': '10.100.52.235',\n",
      " 'SELDON_WEBHOOK_SERVICE_SERVICE_PORT': '443',\n",
      " 'SM_CHANNEL_TEST': '/opt/ml/input/data/test',\n",
      " 'SM_CHANNEL_TRAIN': '/opt/ml/input/data/train',\n",
      " 'SM_CHANNEL_VALIDATION': '/opt/ml/input/data/validation',\n",
      " 'SM_CURRENT_HOST': 'localhost',\n",
      " 'SM_HOSTS': '{\"hosts\":\"localhost\"}',\n",
      " 'SM_MODEL_DIR': '/opt/ml/model/',\n",
      " 'SM_NUM_GPUS': '0',\n",
      " 'SM_OUTPUT_DATA_DIR': '/opt/ml/output/data/',\n",
      " 'SM_OUTPUT_DIR': '/opt/ml/output/',\n",
      " 'SM_TRAINING_ENV': '{\"is_master\":true}',\n",
      " 'TENSORBOARD_PORT': 'tcp://10.100.185.101:9000',\n",
      " 'TENSORBOARD_PORT_9000_TCP': 'tcp://10.100.185.101:9000',\n",
      " 'TENSORBOARD_PORT_9000_TCP_ADDR': '10.100.185.101',\n",
      " 'TENSORBOARD_PORT_9000_TCP_PORT': '9000',\n",
      " 'TENSORBOARD_PORT_9000_TCP_PROTO': 'tcp',\n",
      " 'TENSORBOARD_SERVICE_HOST': '10.100.185.101',\n",
      " 'TENSORBOARD_SERVICE_PORT': '9000',\n",
      " 'TENSORBOARD_SERVICE_PORT_TB': '9000',\n",
      " 'TF_JOB_OPERATOR_PORT': 'tcp://10.100.5.156:8443',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP': 'tcp://10.100.5.156:8443',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP_ADDR': '10.100.5.156',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP_PORT': '8443',\n",
      " 'TF_JOB_OPERATOR_PORT_8443_TCP_PROTO': 'tcp',\n",
      " 'TF_JOB_OPERATOR_SERVICE_HOST': '10.100.5.156',\n",
      " 'TF_JOB_OPERATOR_SERVICE_PORT': '8443',\n",
      " 'TF_JOB_OPERATOR_SERVICE_PORT_MONITORING_PORT': '8443'}\n",
      "SM_TRAINING_ENV {\"is_master\":true}\n",
      "is_master True\n",
      "train_data /opt/ml/input/data/train\n",
      "validation_data /opt/ml/input/data/validation\n",
      "test_data /opt/ml/input/data/test\n",
      "output_dir /opt/ml/output/\n",
      "hosts {'hosts': 'localhost'}\n",
      "current_host localhost\n",
      "num_gpus 0\n",
      "job_name tf-bert-training-eks\n",
      "use_xla True\n",
      "use_amp False\n",
      "max_seq_length 64\n",
      "train_batch_size 36\n",
      "validation_batch_size 18\n",
      "test_batch_size 18\n",
      "epochs 1\n",
      "learning_rate 1e-05\n",
      "epsilon 1e-08\n",
      "train_steps_per_epoch 1\n",
      "validation_steps 1\n",
      "test_steps 1\n",
      "freeze_bert_layer True\n",
      "enable_sagemaker_debugger False\n",
      "run_validation True\n",
      "run_test False\n",
      "run_sample_predictions False\n",
      "2020-10-29 21:02:52.301565: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F\n",
      "enable_tensorboard False\n",
      "enable_checkpointing False\n",
      "checkpoint_base_path /opt/ml/checkpoints\n",
      "checkpoint_path /opt/ml/checkpoints\n",
      "Using pipe_mode: False\n",
      "2020-10-29 21:02:52.307926: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2999995000 Hz\n",
      "2020-10-29 21:02:52.308162: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x425f7a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
      "2020-10-29 21:02:52.308181: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n",
      "2020-10-29 21:02:52.308282: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.\n",
      "WARNING:tensorflow:There are non-GPU devices in `tf.distribute.Strategy`, not using nccl allreduce.\n",
      "train_data_filenames ['/opt/ml/input/data/train/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord', '/opt/ml/input/data/train/part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord']\n",
      "***** Using input_filenames ['/opt/ml/input/data/train/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord', '/opt/ml/input/data/train/part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord']\n",
      "WARNING:tensorflow:From /opt/ml/code/train.py:88: map_and_batch (from tensorflow.python.data.experimental.ops.batching) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `tf.data.Dataset.map(map_func, num_parallel_calls)` followed by `tf.data.Dataset.batch(batch_size, drop_remainder)`. Static tf.data optimizations will take care of using the fused implementation.\n",
      "**************** train *****************\n",
      "{'input_ids': array([[  101,  2065,  2045, ...,     0,     0,     0],\n",
      "       [  101,  1045,  4299, ...,   102,     0,     0],\n",
      "       [  101,  6373,  2252, ...,     0,     0,     0],\n",
      "       ...,\n",
      "       [  101,  4007,  2573, ...,  2185,  2007,   102],\n",
      "       [  101, 10770,  9475, ...,  3071,  1012,   102],\n",
      "       [  101,  1996,  2069, ...,     0,     0,     0]]), 'input_mask': array([[1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([0, 1, 3, 4, 0, 3, 4, 4, 2, 4, 2, 0, 2, 4, 3, 3, 2, 4, 2, 4, 2, 4,\n",
      "       4, 3, 3, 4, 4, 2, 4, 3, 2, 0, 1, 0, 4, 3]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 4569, 2050, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2031, ..., 2412, 5278,  102],\n",
      "       [ 101, 2134, 1005, ...,    0,    0,    0],\n",
      "       ...,\n",
      "       [ 101, 2097, 2196, ..., 1996, 2208,  102],\n",
      "       [ 101, 2043, 1045, ..., 1999, 1996,  102],\n",
      "       [ 101, 2018, 2000, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([1, 0, 2, 4, 0, 4, 0, 2, 3, 0, 0, 2, 0, 4, 1, 1, 3, 0, 1, 3, 4, 0,\n",
      "       3, 4, 4, 2, 4, 2, 0, 2, 4, 3, 3, 2, 4, 2]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 6581,  102, ...,    0,    0,    0],\n",
      "       [ 101, 2023, 2003, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2066, ...,    0,    0,    0],\n",
      "       ...,\n",
      "       [ 101, 1045, 3641, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2572, ..., 1012, 1045,  102],\n",
      "       [ 101, 2092, 1012, ..., 1012, 2034,  102]]), 'input_mask': array([[1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 1, 1, 1]]), 'label_ids': array([3, 4, 4, 2, 4, 2, 0, 2, 4, 3, 3, 2, 4, 2, 4, 2, 4, 4, 3, 3, 4, 4,\n",
      "       2, 4, 3, 2, 0, 1, 0, 4, 3, 4, 1, 3, 3, 0]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[  101,  2017,  2123, ...,     0,     0,     0],\n",
      "       [  101,  2079,  2025, ...,  2033,  2055,   102],\n",
      "       [  101, 12202,  2003, ...,  7595,  2006,   102],\n",
      "       ...,\n",
      "       [  101,  2023,  2003, ...,     0,     0,     0],\n",
      "       [  101,  2561, 19380, ...,  2008,  2052,   102],\n",
      "       [  101,  2224,  2009, ...,     0,     0,     0]]), 'input_mask': array([[1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([2, 0, 2, 4, 3, 3, 2, 4, 2, 4, 2, 4, 4, 3, 3, 4, 4, 2, 4, 3, 2, 0,\n",
      "       1, 0, 4, 3, 4, 1, 3, 3, 0, 2, 2, 4, 1, 4]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 1045, 2031, ..., 2412, 5278,  102],\n",
      "       [ 101, 2134, 1005, ...,    0,    0,    0],\n",
      "       [ 101, 2204, 2565, ...,    0,    0,    0],\n",
      "       ...,\n",
      "       [ 101, 2043, 1045, ..., 1999, 1996,  102],\n",
      "       [ 101, 2018, 2000, ...,    0,    0,    0],\n",
      "       [ 101, 2009, 1005, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([0, 2, 4, 0, 4, 0, 2, 3, 0, 0, 2, 0, 4, 1, 1, 3, 0, 1, 3, 4, 0, 3,\n",
      "       4, 4, 2, 4, 2, 0, 2, 4, 3, 3, 2, 4, 2, 4]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[  101, 10770,  9475, ...,  3071,  1012,   102],\n",
      "       [  101,  1996,  2069, ...,     0,     0,     0],\n",
      "       [  101,  2307,  3976, ...,     0,     0,     0],\n",
      "       ...,\n",
      "       [  101,  2065,  2045, ...,     0,     0,     0],\n",
      "       [  101,  1045,  4299, ...,   102,     0,     0],\n",
      "       [  101,  6373,  2252, ...,     0,     0,     0]]), 'input_mask': array([[1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([4, 3, 4, 1, 3, 3, 0, 2, 2, 4, 1, 4, 3, 3, 0, 4, 1, 0, 2, 4, 0, 4,\n",
      "       0, 2, 3, 0, 0, 2, 0, 4, 1, 1, 3, 0, 1, 3]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "Downloading: 100%|██████████| 232k/232k [00:00<00:00, 720kB/s] \n",
      "Downloading: 100%|██████████| 442/442 [00:00<00:00, 512kB/s]\n",
      "Downloading: 100%|██████████| 363M/363M [00:16<00:00, 21.5MB/s] \n",
      "2020-10-29 21:03:12.267367: W tensorflow/python/util/util.cc:319] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
      "Sucessfully downloaded after 0 retries.\n",
      "** use_amp False\n",
      "enable_sagemaker_debugger False\n",
      "*** OPTIMIZER <tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x7fcc4049c9e8> ***\n",
      "Compiled model <transformers.modeling_tf_distilbert.TFDistilBertForSequenceClassification object at 0x7fcc404569e8>\n",
      "Model: \"tf_distil_bert_for_sequence_classification\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "distilbert (TFDistilBertMain multiple                  66362880  \n",
      "_________________________________________________________________\n",
      "pre_classifier (Dense)       multiple                  590592    \n",
      "_________________________________________________________________\n",
      "classifier (Dense)           multiple                  3845      \n",
      "_________________________________________________________________\n",
      "dropout_19 (Dropout)         multiple                  0         \n",
      "=================================================================\n",
      "Total params: 66,957,317\n",
      "Trainable params: 594,437\n",
      "Non-trainable params: 66,362,880\n",
      "_________________________________________________________________\n",
      "None\n",
      "validation_data_filenames ['/opt/ml/input/data/validation/part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord', '/opt/ml/input/data/validation/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord']\n",
      "***** Using input_filenames ['/opt/ml/input/data/validation/part-algo-2-amazon_reviews_us_Digital_Video_Games_v1_00.tfrecord', '/opt/ml/input/data/validation/part-algo-1-amazon_reviews_us_Digital_Software_v1_00.tfrecord']\n",
      "**************** validation *****************\n",
      "{'input_ids': array([[ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0],\n",
      "       [ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       ...,\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0],\n",
      "       [ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0],\n",
      "       [ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       ...,\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0],\n",
      "       [ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0],\n",
      "       [ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       ...,\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0],\n",
      "       [ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0],\n",
      "       [ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       ...,\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0],\n",
      "       [ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0],\n",
      "       [ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       ...,\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0],\n",
      "       [ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "{'input_ids': array([[ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0],\n",
      "       [ 101, 9733, 2001, ..., 1998, 4149,  102],\n",
      "       ...,\n",
      "       [ 101, 2042, 2478, ...,    0,    0,    0],\n",
      "       [ 101, 2017, 2031, ...,    0,    0,    0],\n",
      "       [ 101, 1045, 2572, ...,    0,    0,    0]]), 'input_mask': array([[1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 1, 1, 1],\n",
      "       ...,\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0],\n",
      "       [1, 1, 1, ..., 0, 0, 0]]), 'label_ids': array([1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2]), 'segment_ids': array([[0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       ...,\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0],\n",
      "       [0, 0, 0, ..., 0, 0, 0]])}\n",
      "Starting Training and Validation...\n",
      "Train for 1 steps, validate for 1 steps\n",
      "2020-10-29 21:03:23.195211: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1574] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.\n",
      "1/1 [==============================] - 14s 14s/step - loss: 1.6096 - accuracy: 0.3056 - val_loss: 1.6394 - val_accuracy: 0.2778\n",
      "2020-10-29 21:03:31.093903: W tensorflow/core/kernels/data/generator_dataset_op.cc:103] Error occurred when finalizing GeneratorDataset iterator: Cancelled: Operation was cancelled\n",
      "<tensorflow.python.keras.callbacks.History object at 0x7fcc18046400>\n",
      "transformer_fine_tuned_model_path /opt/ml/model/transformers/fine-tuned/\n",
      "INFO:transformers.configuration_utils:Configuration saved in /opt/ml/model/transformers/fine-tuned/config.json\n",
      "INFO:transformers.modeling_tf_utils:Model weights saved in /opt/ml/model/transformers/fine-tuned/tf_model.h5\n",
      "tensorflow_saved_model_path /opt/ml/model/tensorflow/saved_model/0\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403ffac8>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403ffac8>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403fc780>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403fc780>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403cb6a0>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403cb6a0>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403d8d68>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc403d8d68>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc40061470>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc40061470>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc4006fb38>, because it is not built.\n",
      "WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dropout object at 0x7fcc4006fb38>, because it is not built.\n",
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/resource_variable_ops.py:1786: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "If using Keras pass *_constraint arguments to layers.\n",
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/resource_variable_ops.py:1786: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "If using Keras pass *_constraint arguments to layers.\n",
      "INFO:tensorflow:Assets written to: /opt/ml/model/tensorflow/saved_model/0/assets\n",
      "INFO:tensorflow:Assets written to: /opt/ml/model/tensorflow/saved_model/0/assets\n"
     ]
    }
   ],
   "source": [
    "!kubectl logs -f bert-csi-fsx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                        PROVISIONER                                                RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE\n",
      "ebs-sc (default)            ebs.csi.aws.com                                            Delete          WaitForFirstConsumer   false                  6h36m\n",
      "fsx-sc                      fsx.csi.aws.com                                            Delete          Immediate              false                  114m\n",
      "local-hostpath              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  7h59m\n",
      "openebs-device              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  8h\n",
      "openebs-hostpath            openebs.io/local                                           Delete          WaitForFirstConsumer   false                  8h\n",
      "openebs-jiva-default        openebs.io/provisioner-iscsi                               Delete          Immediate              false                  8h\n",
      "openebs-snapshot-promoter   volumesnapshot.external-storage.k8s.io/snapshot-promoter   Delete          Immediate              false                  8h\n"
     ]
    }
   ],
   "source": [
    "!kubectl get sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                          STORAGECLASS     REASON   AGE\n",
      "pvc-097f3863-4fa9-4303-9195-d84d51a89985   20Gi       RWO            Delete           Bound    kubeflow/mysql-pv-claim        gp2                       54d\n",
      "pvc-12f27651-af46-4dfb-a181-5c768c87add9   5Gi        RWO            Delete           Bound    kubeflow/ebs-claim             ebs-sc                    6h29m\n",
      "pvc-36899807-7262-46fe-9627-5b3338abd535   10Gi       RWO            Delete           Bound    kubeflow/katib-mysql           gp2                       54d\n",
      "pvc-47f293c3-6406-4e1d-ac68-97190bbe8e5f   10Gi       RWO            Delete           Bound    anonymous/workspace-notebook   gp2                       54d\n",
      "pvc-64da95de-d5fc-4df6-b449-19439dbc7345   20Gi       RWO            Delete           Bound    kubeflow/minio-pv-claim        gp2                       54d\n",
      "pvc-a252cad0-6cc0-4f08-8c81-d0e257849351   5G         RWO            Delete           Bound    kubeflow/local-hostpath-pvc    local-hostpath            7h53m\n",
      "pvc-c2fa8c1a-5422-473e-aab9-802c378749c1   1200Gi     RWX            Delete           Bound    kubeflow/fsx-claim             fsx-sc                    108m\n",
      "pvc-ca36d136-cbfb-4466-9152-2cd389f32fef   10Gi       RWO            Delete           Bound    kubeflow/metadata-mysql        gp2                       54d\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name:            pvc-c2fa8c1a-5422-473e-aab9-802c378749c1\n",
      "Labels:          <none>\n",
      "Annotations:     pv.kubernetes.io/provisioned-by: fsx.csi.aws.com\n",
      "Finalizers:      [kubernetes.io/pv-protection]\n",
      "StorageClass:    fsx-sc\n",
      "Status:          Bound\n",
      "Claim:           kubeflow/fsx-claim\n",
      "Reclaim Policy:  Delete\n",
      "Access Modes:    RWX\n",
      "VolumeMode:      Filesystem\n",
      "Capacity:        1200Gi\n",
      "Node Affinity:   <none>\n",
      "Message:         \n",
      "Source:\n",
      "    Type:              CSI (a Container Storage Interface (CSI) volume source)\n",
      "    Driver:            fsx.csi.aws.com\n",
      "    VolumeHandle:      fs-0ec1bd32928faa01f\n",
      "    ReadOnly:          false\n",
      "    VolumeAttributes:      dnsname=fs-0ec1bd32928faa01f.fsx.us-west-2.amazonaws.com\n",
      "                           mountname=6uzhvbmv\n",
      "                           storage.kubernetes.io/csiProvisionerIdentity=1603983866621-8081-fsx.csi.aws.com\n",
      "Events:                <none>\n"
     ]
    }
   ],
   "source": [
    "!kubectl describe pv pvc-c2fa8c1a-5422-473e-aab9-802c378749c1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                 STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS     AGE\n",
      "ebs-claim            Bound    pvc-12f27651-af46-4dfb-a181-5c768c87add9   5Gi        RWO            ebs-sc           6h34m\n",
      "fsx-claim            Bound    pvc-c2fa8c1a-5422-473e-aab9-802c378749c1   1200Gi     RWX            fsx-sc           115m\n",
      "katib-mysql          Bound    pvc-36899807-7262-46fe-9627-5b3338abd535   10Gi       RWO            gp2              54d\n",
      "local-hostpath-pvc   Bound    pvc-a252cad0-6cc0-4f08-8c81-d0e257849351   5G         RWO            local-hostpath   7h59m\n",
      "metadata-mysql       Bound    pvc-ca36d136-cbfb-4466-9152-2cd389f32fef   10Gi       RWO            gp2              54d\n",
      "minio-pv-claim       Bound    pvc-64da95de-d5fc-4df6-b449-19439dbc7345   20Gi       RWO            gp2              54d\n",
      "mysql-pv-claim       Bound    pvc-097f3863-4fa9-4303-9195-d84d51a89985   20Gi       RWO            gp2              54d\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pvc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name:          fsx-claim\n",
      "Namespace:     kubeflow\n",
      "StorageClass:  fsx-sc\n",
      "Status:        Bound\n",
      "Volume:        pvc-c2fa8c1a-5422-473e-aab9-802c378749c1\n",
      "Labels:        <none>\n",
      "Annotations:   kubectl.kubernetes.io/last-applied-configuration:\n",
      "                 {\"apiVersion\":\"v1\",\"kind\":\"PersistentVolumeClaim\",\"metadata\":{\"annotations\":{},\"name\":\"fsx-claim\",\"namespace\":\"kubeflow\"},\"spec\":{\"accessM...\n",
      "               pv.kubernetes.io/bind-completed: yes\n",
      "               pv.kubernetes.io/bound-by-controller: yes\n",
      "               volume.beta.kubernetes.io/storage-provisioner: fsx.csi.aws.com\n",
      "Finalizers:    [kubernetes.io/pvc-protection]\n",
      "Capacity:      1200Gi\n",
      "Access Modes:  RWX\n",
      "VolumeMode:    Filesystem\n",
      "Mounted By:    bert-csi-fsx\n",
      "Events:        <none>\n"
     ]
    }
   ],
   "source": [
    "!kubectl describe pvc fsx-claim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "fsx = boto3.client('fsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'FileSystems': [{'OwnerId': '231218423789', 'CreationTime': datetime.datetime(2020, 10, 29, 19, 33, 56, 351000, tzinfo=tzlocal()), 'FileSystemId': 'fs-01fd6f8121cbf869e', 'FileSystemType': 'LUSTRE', 'Lifecycle': 'CREATING', 'StorageCapacity': 1200, 'StorageType': 'SSD', 'VpcId': 'vpc-05cd6f5a363b289c2', 'SubnetIds': ['subnet-0394df757ad94fdf0'], 'NetworkInterfaceIds': ['eni-09d83e7856a9a89ae', 'eni-06bce9dfb1e6ec548'], 'DNSName': 'fs-01fd6f8121cbf869e.fsx.us-west-2.amazonaws.com', 'KmsKeyId': 'arn:aws:kms:us-west-2:231218423789:key/3b1402d5-3a3d-43ab-9f5e-7ce68d1d7cde', 'ResourceARN': 'arn:aws:fsx:us-west-2:231218423789:file-system/fs-01fd6f8121cbf869e', 'Tags': [{'Key': 'Name', 'Value': 'test'}], 'LustreConfiguration': {'WeeklyMaintenanceStartTime': '5:06:30', 'DataRepositoryConfiguration': {'Lifecycle': 'CREATING', 'ImportPath': 's3://fsx-csi-231218423789', 'ExportPath': 's3://fsx-csi-231218423789/', 'ImportedFileChunkSize': 1024, 'AutoImportPolicy': 'NEW_CHANGED'}, 'DeploymentType': 'PERSISTENT_1', 'PerUnitStorageThroughput': 50, 'MountName': 'tefhvbmv', 'CopyTagsToBackups': False}}], 'ResponseMetadata': {'RequestId': 'c06d7afd-3204-4017-bfa0-fb23f614e286', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 29 Oct 2020 19:35:36 GMT', 'content-type': 'application/x-amz-json-1.1', 'content-length': '998', 'connection': 'keep-alive', 'x-amzn-requestid': 'c06d7afd-3204-4017-bfa0-fb23f614e286'}, 'RetryAttempts': 0}}\n"
     ]
    }
   ],
   "source": [
    "response = fsx.describe_file_systems(\n",
    "    FileSystemIds=[\n",
    "        'fs-01fd6f8121cbf869e',\n",
    "    ],\n",
    ")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'FileSystem': {'OwnerId': '231218423789', 'CreationTime': datetime.datetime(2020, 10, 29, 19, 47, 21, 966000, tzinfo=tzlocal()), 'FileSystemId': 'fs-0814c59615405ca30', 'FileSystemType': 'LUSTRE', 'Lifecycle': 'AVAILABLE', 'StorageCapacity': 1200, 'StorageType': 'SSD', 'VpcId': 'vpc-05cd6f5a363b289c2', 'SubnetIds': ['subnet-0394df757ad94fdf0'], 'NetworkInterfaceIds': ['eni-0af3120d5487ddc63', 'eni-068153de55532aa81'], 'DNSName': 'fs-0814c59615405ca30.fsx.us-west-2.amazonaws.com', 'ResourceARN': 'arn:aws:fsx:us-west-2:231218423789:file-system/fs-0814c59615405ca30', 'Tags': [{'Key': 'CSIVolumeName', 'Value': 'pvc-61624c47-dd5b-4214-9455-c55560294d53'}], 'LustreConfiguration': {'WeeklyMaintenanceStartTime': '4:07:30', 'DataRepositoryConfiguration': {'Lifecycle': 'UPDATING', 'ImportPath': 's3://fsx-csi-231218423789', 'ExportPath': 's3://fsx-csi-231218423789', 'ImportedFileChunkSize': 1024, 'AutoImportPolicy': 'NEW_CHANGED'}, 'DeploymentType': 'SCRATCH_2', 'MountName': 'eigxvbmv', 'CopyTagsToBackups': False}}, 'ResponseMetadata': {'RequestId': '8f309047-e32f-4b97-bcba-efab614e983e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 29 Oct 2020 20:21:46 GMT', 'content-type': 'application/x-amz-json-1.1', 'content-length': '918', 'connection': 'keep-alive', 'x-amzn-requestid': '8f309047-e32f-4b97-bcba-efab614e983e'}, 'RetryAttempts': 0}}\n"
     ]
    }
   ],
   "source": [
    "response = fsx.update_file_system(\n",
    "    FileSystemId='fs-0814c59615405ca30',\n",
    "    LustreConfiguration={\n",
    "        'AutoImportPolicy': 'NEW_CHANGED'\n",
    "    }\n",
    ")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Client Version: version.Info{Major:\"1\", Minor:\"15+\", GitVersion:\"v1.15.10-eks-bac369\", GitCommit:\"bac3690554985327ae4d13e42169e8b1c2f37226\", GitTreeState:\"clean\", BuildDate:\"2020-02-21T23:37:18Z\", GoVersion:\"go1.12.12\", Compiler:\"gc\", Platform:\"linux/amd64\"}\n",
      "Server Version: version.Info{Major:\"1\", Minor:\"17+\", GitVersion:\"v1.17.9-eks-4c6976\", GitCommit:\"4c6976793196d70bc5cd29d56ce5440c9473648e\", GitTreeState:\"clean\", BuildDate:\"2020-07-17T18:46:04Z\", GoVersion:\"go1.13.9\", Compiler:\"gc\", Platform:\"linux/amd64\"}\n"
     ]
    }
   ],
   "source": [
    "!kubectl version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
